- 
                Notifications
    
You must be signed in to change notification settings  - Fork 45
 
Add Buffer Device Address Backend #311
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
          Codecov ReportAttention: Patch coverage is  
 
 Additional details and impacted files@@            Coverage Diff             @@
##           master     #311      +/-   ##
==========================================
+ Coverage   75.32%   77.07%   +1.74%     
==========================================
  Files          12       12              
  Lines         616      663      +47     
==========================================
+ Hits          464      511      +47     
  Misses        152      152              ☔ View full report in Codecov by Sentry. 🚀 New features to boost your workflow:
  | 
    
| 
          
 Your PR requires formatting changes to meet the project's style guidelines. Click here to view the suggested changes.diff --git a/lib/cl/libopencl.jl b/lib/cl/libopencl.jl
index 1886fcd..afa8233 100644
--- a/lib/cl/libopencl.jl
+++ b/lib/cl/libopencl.jl
@@ -8,8 +8,9 @@ end
 
 function check(f)
     res = retry_reclaim(err -> err == CL_OUT_OF_RESOURCES ||
-                               err == CL_MEM_OBJECT_ALLOCATION_FAILURE ||
-                               err == CL_OUT_OF_HOST_MEMORY) do
+            err == CL_MEM_OBJECT_ALLOCATION_FAILURE ||
+            err == CL_OUT_OF_HOST_MEMORY
+    ) do
         return f()
     end
 
@@ -1330,13 +1331,17 @@ const clIcdSetPlatformDispatchDataKHR_fn = Ptr{clIcdSetPlatformDispatchDataKHR_t
 end
 
 function clIcdGetFunctionAddressForPlatformKHR(platform, func_name)
-    @ext_ccall libopencl.clIcdGetFunctionAddressForPlatformKHR(platform::cl_platform_id,
-                                                               func_name::Ptr{Cchar})::Ptr{Cvoid}
+    return @ext_ccall libopencl.clIcdGetFunctionAddressForPlatformKHR(
+        platform::cl_platform_id,
+        func_name::Ptr{Cchar}
+    )::Ptr{Cvoid}
 end
 
 @checked function clIcdSetPlatformDispatchDataKHR(platform, dispatch_data)
-    @ext_ccall libopencl.clIcdSetPlatformDispatchDataKHR(platform::cl_platform_id,
-                                                         dispatch_data::Ptr{Cvoid})::cl_int
+    @ext_ccall libopencl.clIcdSetPlatformDispatchDataKHR(
+        platform::cl_platform_id,
+        dispatch_data::Ptr{Cvoid}
+    )::cl_int
 end
 
 # typedef cl_program CL_API_CALL clCreateProgramWithILKHR_t ( cl_context context , const void * il , size_t length , cl_int * errcode_ret )
@@ -2054,7 +2059,8 @@ end
 
 @checked function clGetMemAllocInfoINTEL(context, ptr, param_name, param_value_size,
                                          param_value, param_value_size_ret)
-    @ext_ccall libopencl.clGetMemAllocInfoINTEL(context::cl_context, ptr::PtrOrCLPtr{Cvoid},
+    @ext_ccall libopencl.clGetMemAllocInfoINTEL(
+        context::cl_context, ptr::PtrOrCLPtr{Cvoid},
                                                 param_name::cl_mem_info_intel,
                                                 param_value_size::Csize_t,
                                                 param_value::Ptr{Cvoid},
@@ -2064,15 +2070,16 @@ end
 @checked function clSetKernelArgMemPointerINTEL(kernel, arg_index, arg_value)
     @ext_ccall libopencl.clSetKernelArgMemPointerINTEL(kernel::cl_kernel,
                                                        arg_index::cl_uint,
-                                                       arg_value::PtrOrCLPtr{Cvoid})::cl_int
+        arg_value::PtrOrCLPtr{Cvoid}
+    )::cl_int
 end
 
 @checked function clEnqueueMemFillINTEL(command_queue, dst_ptr, pattern, pattern_size, size,
                                         num_events_in_wait_list, event_wait_list, event)
     @ext_ccall libopencl.clEnqueueMemFillINTEL(command_queue::cl_command_queue,
-                                               dst_ptr::PtrOrCLPtr{Cvoid},
-                                               pattern::Ptr{Cvoid}, pattern_size::Csize_t,
-                                               size::Csize_t,
+        dst_ptr::PtrOrCLPtr{Cvoid},
+        pattern::Ptr{Cvoid}, pattern_size::Csize_t,
+        size::Csize_t,
                                                num_events_in_wait_list::cl_uint,
                                                event_wait_list::Ptr{cl_event},
                                                event::Ptr{cl_event})::cl_int
@@ -2091,7 +2098,7 @@ end
 @checked function clEnqueueMemAdviseINTEL(command_queue, ptr, size, advice,
                                           num_events_in_wait_list, event_wait_list, event)
     @ext_ccall libopencl.clEnqueueMemAdviseINTEL(command_queue::cl_command_queue,
-                                                 ptr::PtrOrCLPtr{Cvoid}, size::Csize_t,
+        ptr::PtrOrCLPtr{Cvoid}, size::Csize_t,
                                                  advice::cl_mem_advice_intel,
                                                  num_events_in_wait_list::cl_uint,
                                                  event_wait_list::Ptr{cl_event},
@@ -2106,7 +2113,7 @@ const clEnqueueMigrateMemINTEL_fn = Ptr{clEnqueueMigrateMemINTEL_t}
 @checked function clEnqueueMigrateMemINTEL(command_queue, ptr, size, flags,
                                            num_events_in_wait_list, event_wait_list, event)
     @ext_ccall libopencl.clEnqueueMigrateMemINTEL(command_queue::cl_command_queue,
-                                                  ptr::PtrOrCLPtr{Cvoid}, size::Csize_t,
+        ptr::PtrOrCLPtr{Cvoid}, size::Csize_t,
                                                   flags::cl_mem_migration_flags,
                                                   num_events_in_wait_list::cl_uint,
                                                   event_wait_list::Ptr{cl_event},
@@ -2250,9 +2257,11 @@ const clSetKernelArgDevicePointerEXT_t = Cvoid
 const clSetKernelArgDevicePointerEXT_fn = Ptr{clSetKernelArgDevicePointerEXT_t}
 
 @checked function clSetKernelArgDevicePointerEXT(kernel, arg_index, arg_value)
-    @ext_ccall libopencl.clSetKernelArgDevicePointerEXT(kernel::cl_kernel,
-                                                        arg_index::cl_uint,
-                                                        arg_value::cl_mem_device_address_ext)::cl_int
+    @ext_ccall libopencl.clSetKernelArgDevicePointerEXT(
+        kernel::cl_kernel,
+        arg_index::cl_uint,
+        arg_value::cl_mem_device_address_ext
+    )::cl_int
 end
 
 # typedef cl_int CL_API_CALL clCancelCommandsIMG_t ( const cl_event * event_list , size_t num_events_in_list )
@@ -2273,8 +2282,10 @@ const clSetPerfHintQCOM_t = Cvoid
 const clSetPerfHintQCOM_fn = Ptr{clSetPerfHintQCOM_t}
 
 @checked function clSetPerfHintQCOM(context, perf_hint)
-    @ext_ccall libopencl.clSetPerfHintQCOM(context::cl_context,
-                                           perf_hint::cl_perf_hint_qcom)::cl_int
+    @ext_ccall libopencl.clSetPerfHintQCOM(
+        context::cl_context,
+        perf_hint::cl_perf_hint_qcom
+    )::cl_int
 end
 
 const CL_NAME_VERSION_MAX_NAME_SIZE = 64
diff --git a/lib/cl/memory/buffer.jl b/lib/cl/memory/buffer.jl
index dfd198d..3d7a392 100644
--- a/lib/cl/memory/buffer.jl
+++ b/lib/cl/memory/buffer.jl
@@ -2,7 +2,7 @@
 
 struct Buffer <: AbstractMemoryObject
     id::cl_mem
-    ptr::Union{Nothing,CLPtr{Cvoid}}
+    ptr::Union{Nothing, CLPtr{Cvoid}}
     bytesize::Int
     context::Context
 end
@@ -17,8 +17,10 @@ context(buf::Buffer) = buf.context
 ## constructors
 
 # for internal use
-function Buffer(sz::Int, flags::Integer, hostbuf=nothing;
-                device=:rw, host=:rw, device_private_address=bda_supported(cl.device()))
+function Buffer(
+        sz::Int, flags::Integer, hostbuf = nothing;
+        device = :rw, host = :rw, device_private_address = bda_supported(cl.device())
+    )
     if device == :rw
         flags |= CL_MEM_READ_WRITE
     elseif device == :r
@@ -68,29 +70,33 @@ function Buffer(sz::Int, flags::Integer, hostbuf=nothing;
 end
 
 # allocated buffer
-function Buffer(sz::Integer; host_accessible=false, kwargs...)
+function Buffer(sz::Integer; host_accessible = false, kwargs...)
     flags = host_accessible ? CL_MEM_ALLOC_HOST_PTR : 0
-    Buffer(sz, flags, nothing; kwargs...)
+    return Buffer(sz, flags, nothing; kwargs...)
 end
 
 # from host memory
-function Buffer(hostbuf::Array; copy::Bool=true, kwargs...)
+function Buffer(hostbuf::Array; copy::Bool = true, kwargs...)
     flags = copy ? CL_MEM_COPY_HOST_PTR : CL_MEM_USE_HOST_PTR
-    Buffer(sizeof(hostbuf), flags, hostbuf; kwargs...)
+    return Buffer(sizeof(hostbuf), flags, hostbuf; kwargs...)
 end
 
 
 ## memory operations
 
 # reading from buffer to host array, return an event
-function enqueue_read(dst::Ptr, src::Buffer, src_off::Int, nbytes::Int;
-                      blocking::Bool=false, wait_for::Vector{Event}=Event[])
-    n_evts  = length(wait_for)
+function enqueue_read(
+        dst::Ptr, src::Buffer, src_off::Int, nbytes::Int;
+        blocking::Bool = false, wait_for::Vector{Event} = Event[]
+    )
+    n_evts = length(wait_for)
     evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for]
-    GC.@preserve wait_for begin
+    return GC.@preserve wait_for begin
         ret_evt = Ref{cl_event}()
-        clEnqueueReadBuffer(queue(), src, blocking, src_off, nbytes, dst,
-                            n_evts, evt_ids, ret_evt)
+        clEnqueueReadBuffer(
+            queue(), src, blocking, src_off, nbytes, dst,
+            n_evts, evt_ids, ret_evt
+        )
         @return_nanny_event(ret_evt[], dst)
     end
 end
@@ -98,14 +104,18 @@ enqueue_read(dst::Ptr, src::Buffer, nbytes; kwargs...) =
     enqueue_read(dst, src, 0, nbytes; kwargs...)
 
 # writing from host array to buffer, return an event
-function enqueue_write(dst::Buffer, dst_off::Int, src::Ptr, nbytes::Int;
-                       blocking::Bool=false, wait_for::Vector{Event}=Event[])
-    n_evts  = length(wait_for)
+function enqueue_write(
+        dst::Buffer, dst_off::Int, src::Ptr, nbytes::Int;
+        blocking::Bool = false, wait_for::Vector{Event} = Event[]
+    )
+    n_evts = length(wait_for)
     evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for]
-    GC.@preserve wait_for begin
+    return GC.@preserve wait_for begin
         ret_evt = Ref{cl_event}()
-        clEnqueueWriteBuffer(queue(), dst, blocking, dst_off, nbytes, src,
-                             n_evts, evt_ids, ret_evt)
+        clEnqueueWriteBuffer(
+            queue(), dst, blocking, dst_off, nbytes, src,
+            n_evts, evt_ids, ret_evt
+        )
         @return_nanny_event(ret_evt[], dst)
     end
 end
@@ -113,15 +123,19 @@ enqueue_write(dst::Buffer, src::Ptr, nbytes; kwargs...) =
     enqueue_write(dst, 0, src, nbytes; kwargs...)
 
 # copying between two buffers, return an event
-function enqueue_copy(dst::Buffer, dst_off::Int, src::Buffer, src_off::Int,
-                      nbytes::Int; blocking::Bool=false,
-                      wait_for::Vector{Event}=Event[])
-    n_evts  = length(wait_for)
+function enqueue_copy(
+        dst::Buffer, dst_off::Int, src::Buffer, src_off::Int,
+        nbytes::Int; blocking::Bool = false,
+        wait_for::Vector{Event} = Event[]
+    )
+    n_evts = length(wait_for)
     evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for]
-    GC.@preserve wait_for begin
+    return GC.@preserve wait_for begin
         ret_evt = Ref{cl_event}()
-        clEnqueueCopyBuffer(queue(), src, dst, src_off, dst_off, nbytes,
-                            n_evts, evt_ids, ret_evt)
+        clEnqueueCopyBuffer(
+            queue(), src, dst, src_off, dst_off, nbytes,
+            n_evts, evt_ids, ret_evt
+        )
         @return_event ret_evt[]
     end
 end
@@ -129,8 +143,10 @@ enqueue_copy(dst::Buffer, src::Buffer, N; kwargs...) =
     enqueue_copy(dst, 0, src, 0, N; kwargs...)
 
 # map a buffer into the host address space, returning a pointer and an event
-function enqueue_map(buf::Buffer, offset::Integer, nbytes::Int, flags=:rw;
-                     blocking::Bool=false, wait_for::Vector{Event}=Event[])
+function enqueue_map(
+        buf::Buffer, offset::Integer, nbytes::Int, flags = :rw;
+        blocking::Bool = false, wait_for::Vector{Event} = Event[]
+    )
     flags = if flags == :rw
         CL_MAP_READ | CL_MAP_WRITE
     elseif flags == :r
@@ -142,12 +158,14 @@ function enqueue_map(buf::Buffer, offset::Integer, nbytes::Int, flags=:rw;
     end
 
     ret_evt = Ref{cl_event}()
-    n_evts  = length(wait_for)
+    n_evts = length(wait_for)
     evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for]
     GC.@preserve wait_for begin
-        status  = Ref{Cint}()
-        ptr = clEnqueueMapBuffer(queue(), buf, blocking, flags, offset, nbytes,
-                                 n_evts, evt_ids, ret_evt, status)
+        status = Ref{Cint}()
+        ptr = clEnqueueMapBuffer(
+            queue(), buf, blocking, flags, offset, nbytes,
+            n_evts, evt_ids, ret_evt, status
+        )
         if status[] != CL_SUCCESS
             throw(CLError(status[]))
         end
@@ -155,12 +173,12 @@ function enqueue_map(buf::Buffer, offset::Integer, nbytes::Int, flags=:rw;
         return ptr, Event(ret_evt[])
     end
 end
-enqueue_map(buf::Buffer, nbytes::Int, flags=:rw; kwargs...) =
+enqueue_map(buf::Buffer, nbytes::Int, flags = :rw; kwargs...) =
     enqueue_map(buf, 0, nbytes, flags; kwargs...)
 
 # unmap a buffer, return an event
-function enqueue_unmap(buf::Buffer, ptr::Ptr; wait_for::Vector{Event}=Event[])
-    n_evts  = length(wait_for)
+function enqueue_unmap(buf::Buffer, ptr::Ptr; wait_for::Vector{Event} = Event[])
+    n_evts = length(wait_for)
     evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for]
     GC.@preserve wait_for begin
         ret_evt = Ref{cl_event}()
@@ -170,18 +188,22 @@ function enqueue_unmap(buf::Buffer, ptr::Ptr; wait_for::Vector{Event}=Event[])
 end
 
 # fill a buffer with a pattern, returning an event
-function enqueue_fill(buf::Buffer, offset::Integer, pattern::T, N::Integer;
-                      wait_for::Vector{Event}=Event[]) where {T}
+function enqueue_fill(
+        buf::Buffer, offset::Integer, pattern::T, N::Integer;
+        wait_for::Vector{Event} = Event[]
+    ) where {T}
     nbytes = N * sizeof(T)
     nbytes_pattern = sizeof(T)
     @assert nbytes_pattern > 0
-    n_evts  = length(wait_for)
+    n_evts = length(wait_for)
     evt_ids = isempty(wait_for) ? C_NULL : [pointer(evt) for evt in wait_for]
-    GC.@preserve begin
+    return GC.@preserve begin
         ret_evt = Ref{cl_event}()
-        clEnqueueFillBuffer(queue(), buf, [pattern],
-                            nbytes_pattern, offset, nbytes,
-                            n_evts, evt_ids, ret_evt)
+        clEnqueueFillBuffer(
+            queue(), buf, [pattern],
+            nbytes_pattern, offset, nbytes,
+            n_evts, evt_ids, ret_evt
+        )
         @return_event ret_evt[]
     end
 end
diff --git a/lib/cl/memory/usm.jl b/lib/cl/memory/usm.jl
index a12bc52..9aec2f2 100644
--- a/lib/cl/memory/usm.jl
+++ b/lib/cl/memory/usm.jl
@@ -1,7 +1,7 @@
 abstract type UnifiedMemory <: AbstractPointerMemory end
 
 function usm_free(mem::UnifiedMemory; blocking::Bool = false)
-    if blocking
+    return if blocking
         clMemBlockingFreeINTEL(context(mem), mem)
     else
         clMemFreeINTEL(context(mem), mem)
diff --git a/lib/cl/state.jl b/lib/cl/state.jl
index de44bd5..46a2943 100644
--- a/lib/cl/state.jl
+++ b/lib/cl/state.jl
@@ -212,7 +212,7 @@ function default_memory_backend(dev::Device)
         error("Unknown memory backend '$backend_str' requested")
     end
     in(backend, supported_backends) ? backend : nothing
-    backend
+    return backend
 end
 
 function memory_backend()
@@ -224,7 +224,7 @@ function memory_backend()
         end
         if backend === BufferBackend() && !bda_supported(dev)
             @warn """Your device $(dev.name) does not support the necessary extensions for OpenCL.jl's memory management (requiring either USM, coarse-grained SVM, or BDA).
-                     Falling back to plain OpenCL buffers, which severely limits compatibility with other OpenCL.jl, only supporting OpenCL C kernels.""" maxlog=1 _id="memory_backend_$(dev.name)"
+            Falling back to plain OpenCL buffers, which severely limits compatibility with other OpenCL.jl, only supporting OpenCL C kernels.""" maxlog = 1 _id = "memory_backend_$(dev.name)"
         end
         backend
     end
diff --git a/src/array.jl b/src/array.jl
index ee5ee63..28655e2 100644
--- a/src/array.jl
+++ b/src/array.jl
@@ -52,7 +52,7 @@ mutable struct CLArray{T, N, M} <: AbstractGPUArray{T, N}
             maxsize
         end
         data = GPUArrays.cached_alloc((CLArray, cl.context(), M, bufsize)) do
-            buf = managed_alloc(M, bufsize; alignment=Base.datatype_alignment(T))
+            buf = managed_alloc(M, bufsize; alignment = Base.datatype_alignment(T))
             DataRef(free, buf)
         end
         obj = new{T, N, M}(data, maxsize, 0, dims)
@@ -392,20 +392,26 @@ for (srcty, dstty) in [(:Array, :CLArray), (:CLArray, :Array), (:CLArray, :CLArr
                     cl.enqueue_usm_copy(pointer(dst, dst_off), pointer(src, src_off), nbytes; blocking)
                 else
                     if src isa CLArray && dst isa CLArray
-                        cl.enqueue_copy(convert(cl.Buffer, dst.data[]),
+                        cl.enqueue_copy(
+                            convert(cl.Buffer, dst.data[]),
                             (dst.offset * Base.elsize(dst)) + (dst_off - 1) * sizeof(T),
                             convert(cl.Buffer, src.data[]),
                             (src.offset * Base.elsize(src)) + (src_off - 1) * sizeof(T),
-                            nbytes; blocking)
+                            nbytes; blocking
+                        )
                     elseif dst isa CLArray
-                        cl.enqueue_write(convert(cl.Buffer, dst.data[]),
+                        cl.enqueue_write(
+                            convert(cl.Buffer, dst.data[]),
                             (dst.offset * Base.elsize(dst)) + (dst_off - 1) * sizeof(T),
-                            pointer(src, src_off), nbytes; blocking)
+                            pointer(src, src_off), nbytes; blocking
+                        )
                     elseif src isa CLArray
-                        cl.enqueue_read(pointer(dst, dst_off),
+                        cl.enqueue_read(
+                            pointer(dst, dst_off),
                             convert(cl.Buffer, src.data[]),
                             (src.offset * Base.elsize(src)) + (src_off - 1) * sizeof(T),
-                            nbytes; blocking)
+                            nbytes; blocking
+                        )
                     end
                 end
             end
@@ -522,7 +528,7 @@ function Base.resize!(a::CLVector{T}, n::Integer) where {T}
     # replace the data with a new CL. this 'unshares' the array.
     # as a result, we can safely support resizing unowned buffers.
     new_data = cl.context!(context(a)) do
-        mem = managed_alloc(memtype(a), bufsize; alignment=Base.datatype_alignment(T))
+        mem = managed_alloc(memtype(a), bufsize; alignment = Base.datatype_alignment(T))
         ptr = convert(CLPtr{T}, mem)
         m = min(length(a), n)
         if m > 0
@@ -532,7 +538,7 @@ function Base.resize!(a::CLVector{T}, n::Integer) where {T}
                 elseif memtype(a) <: cl.UnifiedMemory
                     cl.enqueue_usm_copy(ptr, pointer(a), m*sizeof(T); blocking=false)
                 else
-                    cl.enqueue_copy(convert(cl.Buffer, mem), 0, convert(cl.Buffer, a.data[]), a.offset * Base.elsize(a), m*sizeof(T); blocking=false)
+                    cl.enqueue_copy(convert(cl.Buffer, mem), 0, convert(cl.Buffer, a.data[]), a.offset * Base.elsize(a), m * sizeof(T); blocking = false)
                 end
             end
         end
diff --git a/src/memory.jl b/src/memory.jl
index 94d8e44..e13aed5 100644
--- a/src/memory.jl
+++ b/src/memory.jl
@@ -113,7 +113,7 @@ end
 
 
 ## public interface
-function managed_alloc(t::Type{T}, bytes::Int; kwargs...) where T
+function managed_alloc(t::Type{T}, bytes::Int; kwargs...) where {T}
     if bytes == 0
         return Managed(T())
     else
diff --git a/src/util.jl b/src/util.jl
index 70a002f..98b1530 100644
--- a/src/util.jl
+++ b/src/util.jl
@@ -56,7 +56,7 @@ function versioninfo(io::IO=stdout)
     prefs = [
         "default_memory_backend" => load_preference(OpenCL, "default_memory_backend"),
     ]
-    if any(x->!isnothing(x[2]), prefs)
+    if any(x -> !isnothing(x[2]), prefs)
         println(io, "Preferences:")
         for (key, val) in prefs
             if !isnothing(val)
@@ -90,7 +90,7 @@ function versioninfo(io::IO=stdout)
                 if svm_caps.fine_grain_buffer
                     push!(svm_tags, "f")
                 end
-                push!(tags, "svm:"*join(svm_tags, "+"))
+                push!(tags, "svm:" * join(svm_tags, "+"))
             end
             if cl.usm_supported(device)
                 usm_tags = []
@@ -101,7 +101,7 @@ function versioninfo(io::IO=stdout)
                 if usm_caps.device.access
                     push!(usm_tags, "d")
                 end
-                push!(tags, "usm:"*join(usm_tags, "+"))
+                push!(tags, "usm:" * join(usm_tags, "+"))
             end
             if cl.bda_supported(device)
                 push!(tags, "bda")
diff --git a/test/buffer.jl b/test/buffer.jl
index 34245c9..6eca059 100644
--- a/test/buffer.jl
+++ b/test/buffer.jl
@@ -15,7 +15,7 @@
     end
 
     # host accessible, mapped
-    let buf = cl.Buffer(sizeof(Int); host_accessible=true)
+    let buf = cl.Buffer(sizeof(Int); host_accessible = true)
         src = [42]
         cl.enqueue_write(buf, pointer(src), sizeof(src); blocking=true)
 
@@ -65,7 +65,7 @@
     end
 
     # fill
-    let buf = cl.Buffer(3*sizeof(Int))
+    let buf = cl.Buffer(3 * sizeof(Int))
         cl.enqueue_fill(buf, 42, 3)
 
         arr = Vector{Int}(undef, 3) | 
    
| 
           @maleadt Can you take a look at the PR to see whats missing? Most tests pass now. Regarding BDA specific tests, I think what we have for normal  Here's the latest one:  | 
    
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Couple of initial thoughts. I haven't had the time to thoroughly test this.
| 
           If PoCL supports this, we should probably have a way to pick the back-end (e.g. using a preference, asserting at run time that the device supports said back-end) and test this functionality on CI. The test failures are problematic though. Indexing really shouldn't behave differently on this back-end.  | 
    
          
 I fixed the indexing part. :) The only remaining errors come from: 
  | 
    
          
 PoCL does support this.  | 
    
| 
           @maleadt For now, I've introduced a environment variable based method to select backend before loading OpenCL. This should be enough to run pocl + bda tests. All tests pass on my side The PR should be ready if you don't have any issues. :)  | 
    
| 
           Taking a look...  | 
    
| 
           I have some fixes locally. Am trying to unify some more things, I've always been annoyed how the Buffer abstraction is pretty disjoint from our Memory wrappers.  | 
    
| 
           Pushed the refactor. WIP because there's a remaining issue: IIUC this used to work because the Buffer -> pointer conversion was postponed to launch time, while I'm doing the conversion earlier now. That's not ideal, because it also makes it impossible to pass buffers to kernels accepting pointers directly (as opposed to contained in structs) without the BDA extension. EDIT: Pushed a quick fix for that. But the question remains if we still want to support passing buffers directly. Maybe we should just ditch that and require one of the back-ends to be supported...  | 
    
          
 This makes sense from my POV. I say this because at the very least, SVM is mandated by the actual OpenCL spec. We should at least have the liberty to assume that we're working with a complete implementation of the very spec we're wrapping around here, or else it would get too complicated. What do you think?  | 
    
          
 I don't think it is, at least not for OpenCL 3.0+: KhronosGroup/OpenCL-Docs#913  | 
    
| 
           Okay, reading about it, it seems like in 3.0 they made a lot of things optional and brought it closer to 1.2... Well, whichever of the two options is easier and cleaner to maintain then I guess. :)  | 
    
| 
           I figured out a way to have the   | 
    
          
 I believe this will improve in the future if/when #319 works?  | 
    
          
 Sadly not, because even with  Besides, only OpenCL 2.0 technically supports passing pointers inside of structures, and despite NVIDIA and Intel's drivers advertising OpenCL 2.0 compatibility, their copies of   | 
    
| 
           I think this is good to go. @VarLad Any final thoughts on my changes / Anything else you want to add? Otherwise I think I'll merge this once CI agrees.  | 
    
| 
           @maleadt I think clCreateBufferWithProperties is provided by OpenCL 3.0 so maybe we can use just that and remove  Other than this, the code looks good, no obvious issues I can see.  | 
    
          
 Doesn't hurt to keep the old   | 
    
| 
           Ok, makes sense. Looks good to me, lets merge this! 👀  | 
    


More on the extension here: https://registry.khronos.org/OpenCL/specs/3.0-unified/html/OpenCL_API.html#cl_ext_buffer_device_address
This PR introduces a new backend for the array interface, as well as bumps the generated wrapper version for OpenCL Headers.
Main purpose for the new backend is to be able to support more devices than currently possible.
This is heavily inspired by the recent merge of bda into Mesa's Rusticl. Specifically the Zink backend, which means OpenCL.jl can use Vulkan as a backend and ideally work on a large number of previously unsupported devices.
This is still a work in progress, and mostly meant for review or question purposes at the current stage.Current status: All tests pass. Feel free to try! This is supported both by POCL 7.0 as well as with mesa-git using the environment variable
RUSTICL_ENABLE=zink(this can work with any compliant Vulkan driver)